import scrapy

from scrapy import Selector

from spider_0925.items import Spider0925Item

class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['movie.douban.com']
    start_urls = ['https://movie.douban.com/top250']

    def parse(self, response):
        sel = Selector(response)
        list_movies = sel.css('#content .grid_view .item')
        for movie_li in list_movies:
            spider_item = Spider0925Item()
            spider_item['title'] = movie_li.css('span.title::text').extract_first()
            spider_item['score'] = movie_li.css('span.rating_num::text').extract_first()
            spider_item['main_idea'] = movie_li.css('span.inq::text').extract_first()
            yield spider_item
        next_link = response.xpath("//span[@class='next']/link/@href").extract()
        # 查询下一页是否有链接，有链接则爬取，无链接则不爬取
        if next_link:
            next_link = next_link[0]
            yield scrapy.Request("https://movie.douban.com/top250" + next_link, callback=self.parse, dont_filter=True)
